/******************************************************************************
Author: Akshay Dixit
Date Created: September 4, 2018
Data last modified: November 10, 2020
File Name: 2_tz.do
Project: T4D Tanzania 
Purpose: Analysis of primary outcomes
******************************************************************************/

/* LIST OF PRIMARY OUTCOMES & THE ASSOCIATED VARIABLES

RQ.1 Uptake of health services
	
	1. First ANC visit within the first trimester - pf04 pf05e_1
	2. Four or more ANC visits - pf04a_card pf04a_moth pf04a_high
	3. Delivery with a skilled attendant - pf15
	4. Delivery at a health facility - sk09
	5. Postpartum & Postnatal care - pf39 pf40
		
RQ.2 Content of health services: Combine into an index
	
	1. Antenatal content of care - pf06a pf06b pf06c pf06d pf06e pf06f pf06g pf06h pf06i pf06j pf06k pf07 pf08
	2. Delivery content of care - pf17 pf18 pf19 pf20 pf21 pf22
	2. Postpartum content of care (mother) - 
		a. pf41_1_a pf41_1_b pf41_1_c pf41_1_d
		b. pf41_2_a pf41_2_b pf41_2_c pf41_2_d
		c. pf41_3_a pf41_3_b pf41_3_c pf41_3_d
		d. pf41_4_a pf41_4_b pf41_4_c pf41_4_d
		e. pf41_5_a pf41_5_b pf41_5_c pf41_5_d
		f. pf41_6_a pf41_6_b pf41_6_c pf41_6_d
		g. pf41_7_a pf41_7_b pf41_7_c pf41_7_d
		h. pf41_8_a pf41_8_b pf41_8_c pf41_8_d
		i. pf41_13_a pf41_13_b pf41_13_c pf41_13_d
	3. Postnatal content of care (newborn) - 
		a. pf43_1_a pf43_1_b pf43_1_c pf43_1_d
		b. pf43_2_a pf43_2_b pf43_2_c pf43_2_d
		c. pf43_3_a pf43_3_b pf43_3_c pf43_3_d
		d. pf43_4_a pf43_4_b pf43_4_c pf43_4_d
		e. pf43_5_a pf43_5_b pf43_5_c pf43_5_d
		f. pf43_6_a pf43_6_b pf43_6_c pf43_6_d
		
RQ.3 Health outcomes

	1. Weight - pf49
	2. Height - pf50
		Age - sk02a sk02b
		Sex - sk05
		
RQ.4 Empowerment

	1. Participation - ks03 ks07 ks09_a ks09_b ks09_c ks09_d ks09_e ks09_f ks09_g
	2. Perceptions of empowerment - ks11, adjusted using ks12a ks12b ks12c ks13	
	

*/

clear all
set more off

********************************************************************************

* PREPARING PRIMARY OUTCOMES - RQ.1

u "$data/T4D_end_hh_clean.dta", clear

desc pf04a_card pf04a_moth pf04a_high pf04 pf04_2 pf05e_1 pf15 sk09 pf39 pf40
local utilization pf04a_card pf04a_moth pf04a_high pf04 pf05e_1 pf15 sk09 pf39 pf40
foreach var of local utilization {
	tab `var'
}

***First ANC visit within the first trimester***

tab pf02	//In 1088 cases, woman reported receiving ANC but didn't have the ANC card

	*Recode don't know/refusal as missing
	foreach var in pf01 pf02 pf04 pf04_2 {
		replace `var' = . if `var' == .d | `var' == .n | `var' == .r
	}

g timing_anc_visit = pf04	//Information from card
replace timing_anc_visit = pf04_2 if timing_anc_visit == .	//Information from mother where card not available
count if timing_anc_visit == . & pf01 == 1	//117 missing values excluding women who received no ANC care whatsoever

g anc_first_trimester = (timing_anc_visit <= 13)
replace anc_first_trimester = . if timing_anc_visit == .
tab anc_first_trimester		//23% women had their first ANC visit within the first trimester

	//Define this outcome to include care from skilled providers only
replace anc_first_trimester = 0 if pf05e_1 == 4 | pf05e_1 == 5 | pf05e_1 == 95


***Four or more ANC visits***

tab pf01 //63 received no ANC whatsoever - code these as 0 in the number of ANC visits
	
	*Recode don't know/refusal as missing
	foreach var in pf04a_card pf04a_moth {
		replace `var' = . if `var' == .d | `var' == .n | `var' == .r | `var' == 95
	}

g number_anc_visits = pf04a_card
replace number_anc_visits = pf04a_moth if number_anc_visits == .
tab number_anc_visits
count if number_anc_visits == . & pf01 == 1	//71 missing values, excluding women who received no ANC care whatsoever

g number_anc_visits_binary = (number_anc_visits >= 4)
replace number_anc_visits_binary = . if number_anc_visits == .
replace number_anc_visits_binary = 0 if pf01 == 2

	//Define this outcome to include care from skilled providers only
g unskilled_anc_provider = (pf05e_1 == 4 | pf05e_1 == 5 | pf05e_1 == 95 | ///
							pf05e_2 == 4 | pf05e_2 == 5 | pf05e_2 == 95 | ///
							pf05e_3 == 4 | pf05e_3 == 5 | pf05e_3 == 95 | ///
							pf05e_4 == 4 | pf05e_4 == 5 | pf05e_4 == 95)
tab unskilled_anc* 				//Just 21 cases where at least one of the first 4 visits was with an unskilled provider
replace number_anc_visits_binary = 0 if unskilled_anc_provider == 1
tab number_anc_visits_binary	//52% women had 4 or more ANC visits


***Delivery with a skilled attendant***

g skilled_provider = substr(pf15,1,1)
g skilled_provider_birth = (skilled_provider == "A" | skilled_provider == "B" | skilled_provider == "C") 	
		//Doctor/Clinical Officer, Nurse/Midwife/Other health facility personnel, Assistant Medical Officer
replace skilled_provider_birth = . if skilled_provider == "Y"	
		//8 Don't Know responses coded as missing
drop skilled_provider
tab skilled_provider	//68% births with a skilled provider


***Delivery at a facility***

g facility_birth = (sk09 == 1)
tab facility_birth		//67% births at a facility


***Postpartum & Postnatal care***

	*Recode don't know/refusal as missing
	foreach var in pf39 pf40 {
		replace `var' = . if `var' == .d | `var' == .n | `var' == .r
	}
	//11 and 21 missing values in pf39 and pf40, respectively
	
g postpartum_care = (pf39 == 1)
replace postpartum_care = . if pf39 == .
tab postpartum_care		//35% women reported receiving postpartum care

g postnatal_care = (pf40 == 1)
replace postnatal_care = . if pf40 == .
tab postnatal_care		//49% women reported receiving postnatal care

g postpartum_postnatal_care = (pf39 == 1 & pf40 == 1)
replace postpartum_postnatal_care = . if (pf39 == . | pf40 == .) 
tab postpartum_postnatal_care	//31% women reported receiving both postpartum & postnatal care
		
********************************************************************************

* PREPARING PRIMARY OUTCOMES - RQ.2

***Create/clean the necessary variables***
	
	*Antenatal content of care
local acc pf06a pf06b pf06c pf06d pf06e pf06f pf06g pf06i pf06j pf07 pf08
foreach var of local acc {
	replace `var' = . if `var' == .d | `var' == .n | `var' == .r
	replace `var' = 0 if `var' == 2
	tab `var'
}

g pf06jk = (pf06j == 1 & pf06k == 1) & pf06j != . & pf06k != .
replace pf06jk = . if pf06j == .

g pf06gh = (pf06g == 1 & pf06h >= 90) & pf06g != . & pf06h != .
replace pf06gh = . if pf06g == .

count if pf06a == . & pf06b == . & pf06c == . & pf06d == . & pf06e == . & pf06f == . & pf06g == . & pf06i == . & pf06j == . & pf07 == . & pf08 == .
	//64 missing, which is more or less consistent with the number of women who got no ANC at all
count if pf06a == . & pf06b == . & pf06c == . & pf06d == . & pf06e == . & pf06f == . & pf06g == . & pf06i == . & pf06j == . & pf07 == . & pf08 == . & pf01 == 1
	//0 cases where the woman got ANC but all sub-components were missing, so this looks good.
	
	
	*Delivery content of care
desc pf17 pf18 pf19 pf20 pf21 pf22
tab pf15	//101 cases where no one assisted with the delivery. These are all missing delivery content of care.
local dcc pf17 pf18 pf19 pf20 pf21 pf22
foreach var of local dcc {
		di "`var'"
		replace `var' = . if `var' == .d | `var' == .n | `var' == .r
		replace `var' = 0 if `var' == 2
		tab `var'
	}
replace pf19 = 1 if pf19 == . & sk09 == 1
	//For births at a facility, it is assumed that PF 19 = Yes
	
	
	*Postpartum content of care
desc pf41_1_a pf41_2_a pf41_3_a pf41_4_a pf41_5_a pf41_6_a pf41_7_a pf41_8_a pf41_13_a 
local ppcc pf41_1_a pf41_2_a pf41_3_a pf41_4_a pf41_5_a pf41_6_a pf41_7_a pf41_8_a pf41_13_a
foreach var of local ppcc {
		di "`var'"
		replace `var' = . if `var' == .d | `var' == .n | `var' == .r
		replace `var' = 0 if `var' == 2
		tab `var'
	}


	*Postnatal content of care
tab sk03	//14 stillbirths, which are all missing postnatal care
desc pf43_1_a pf43_2_a pf43_3_a pf43_4_a pf43_5_a pf43_6_a
local pncc pf43_1_a pf43_2_a pf43_3_a pf43_4_a pf43_5_a pf43_6_a
foreach var of local pncc {
		replace `var' = . if `var' == .d | `var' == .n | `var' == .r
		replace `var' = 0 if `var' == 2
		tab `var'
	}
	
*****************************************************

***Impute missing values with the treatment assignment group mean***

	*Antenatal content of care
local acc pf06a pf06b pf06c pf06d pf06e pf06f pf06gh pf06i pf06jk pf07 pf08
	foreach var of local acc {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
		drop mean_`var'
	}


	*Delivery content of care
local dcc pf17 pf18 pf19 pf20 pf21 pf22
	foreach var of local dcc {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
		drop mean_`var'
	}
	
	*Postpartum content of care
local ppcc pf41_1_a pf41_2_a pf41_3_a pf41_4_a pf41_5_a pf41_6_a pf41_7_a pf41_8_a pf41_13_a
	foreach var of local ppcc {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
		drop mean_`var'
	}

	*Postnatal content of care
local pncc pf43_1_a pf43_2_a pf43_3_a pf43_4_a pf43_5_a pf43_6_a
	foreach var of local pncc {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
		*replace `var' = . if sk03 == 1	//This line, if executed, excludes still-births from the analysis
		drop mean_`var'
	}

*****************************************************
	
***Individual content of care outcomes***

egen antenatal_content = rowtotal(pf06a pf06b pf06c pf06d pf06e pf06f pf06gh pf06i pf06jk pf07 pf08), missing
egen delivery_content = rowtotal(pf17 pf18 pf19 pf20 pf21 pf22), missing
egen postpartum_content = rowtotal(pf41_1_a pf41_2_a pf41_3_a pf41_4_a pf41_5_a pf41_6_a pf41_7_a pf41_8_a pf41_13_a), missing
egen postnatal_content = rowtotal(pf43_1_a pf43_2_a pf43_3_a pf43_4_a pf43_5_a pf43_6_a), missing

*****************************************************

***Content of care index***	

	*1. All variables are already oriented so that higher values represent "better" outcomes
	
	*2. Standardize each outcome - using the mean and s.d. of the control group
	local content antenatal_content delivery_content postpartum_content postnatal_content
	foreach var of local content {
		sum `var' if treatment == 0
		scalar mean_`var' = r(mean)
		scalar sd_`var' = r(sd)
		g z_`var' = (`var' - mean_`var')/sd_`var'
	}
	
	*3. Missing value imputation done
	
	*4. Compile summary index
	egen z_content = rowmean(z_antenatal_content z_delivery_content z_postpartum_content z_postnatal_content)
	sum z_content
	
********************************************************************************

* PREPARING PRIMARY OUTCOMES - RQ.3

desc pf49* pf50* sk02a sk02b sk05
	
	//pf49_a, pf49_b and pf49_c are the weights of the mother
	//pf49_d, pf49_e and pf49_f are the weights of the mother + baby


***Examining data on weight***

tab pf49_x
tab sk03 if pf49_x != 1
tab sk07 if pf49_x != 1
tab consent_child1 if pf49_x != 1 & sk03 == 2 & sk07 == 1
tab consent_child2 if pf49_x != 1 & sk03 == 2 & sk07 == 1
tab assent_child2 if pf49_x != 1 & sk03 == 2 & sk07 == 1
	
	/*	41 missing values. Of these:
			14 were stillbirths.
			9 infants were no longer alive.
			Of the remaining 18, 14 mothers did not consent to being measured.
			There are 4 other missing values, with reasons specified in pf49_reason.
	*/
	
tab pf49_c
g weight_diff_ab = abs(pf49_a - pf49_b)
tab weight_diff_ab
count if weight_diff_ab >= 0.1 & weight_diff_ab != .

	/* 
		A third measurement was taken in 425 cases.
		In 419 cases, the difference between the first two readings was more than or equal to 0.1 kg. 
	
	*/

tab pf49_f
g weight_diff_de = abs(pf49_d - pf49_e)
tab weight_diff_de
count if weight_diff_de >= 0.1 & weight_diff_de != .

	/* 
		A third measurement was taken in 130 cases.
		In 123 cases, the difference between the first two readings was more than or equal to 0.1 kg. 
	
	*/
	
	
***Examining data on height***
	
tab pf50_x
tab sk03 if pf50_x != 1
tab sk07 if pf50_x != 1
tab consent_child1 if pf50_x != 1 & sk03 == 2 & sk07 == 1
tab consent_child2 if pf50_x != 1 & sk03 == 2 & sk07 == 1
tab assent_child2 if pf50_x != 1 & sk03 == 2 & sk07 == 1

	/*	83 missing values. Of these:
			14 were stillbirths.
			9 infants were no longer alive.
			Of the remaining 60, 14 mothers did not consent to their child being measured.
			Reasons for other missing values are specified in pf50_reason.
	*/
	
tab pf50_c
g height_diff_ab = abs(pf50_a - pf50_b)
tab height_diff_ab
count if height_diff_ab >= 0.7 & height_diff_ab != .

	/* 
		A third measurement was taken in 31 cases.
		In 31 cases, the difference between the first two readings was more than or equal to 0.7 cm. 
	
	*/
	
	
***WEIGHT -- Combine the different readings into a single measure***
	
	//Combined - Mother + Infant
g weight_diff_bc = abs(pf49_b - pf49_c)
g weight_diff_ac = abs(pf49_a - pf49_c) 
egen least_weight_diff = rowmin(weight_diff_ab weight_diff_bc weight_diff_ac) 
tab least_weight_diff	//At least two of three readings were exactly the same in 99% cases
egen weight_ab = rowmean(pf49_a pf49_b)
egen weight_bc = rowmean(pf49_b pf49_c)
egen weight_ac = rowmean(pf49_a pf49_c)

g weight_combined = .
replace weight_combined = weight_ab if least_weight_diff == weight_diff_ab
replace weight_combined = weight_bc if ((least_weight_diff == weight_diff_bc) & (pf49_c != .)) 
replace weight_combined = weight_ac if ((least_weight_diff == weight_diff_ac) & (pf49_c != .))

tab weight_combined
drop least_weight_diff

	//Mother only
g weight_diff_ef = abs(pf49_e - pf49_f)
g weight_diff_df = abs(pf49_d - pf49_f) 
egen least_weight_diff = rowmin(weight_diff_de weight_diff_ef weight_diff_df) 
tab least_weight_diff	//At least two of three readings were exactly the same in 99.72% cases
egen weight_de = rowmean(pf49_d pf49_e)
egen weight_ef = rowmean(pf49_e pf49_f)
egen weight_df = rowmean(pf49_d pf49_f)

g weight_mother = .
replace weight_mother = weight_de if least_weight_diff == weight_diff_de
replace weight_mother = weight_ef if ((least_weight_diff == weight_diff_ef) & (pf49_f != .)) 
replace weight_mother = weight_df if ((least_weight_diff == weight_diff_df) & (pf49_f != .))

tab weight_mother

drop *weight_diff* weight_ab weight_bc weight_ac weight_de weight_ef weight_df

	//Infant only
g weight_infant = weight_combined - weight_mother
tab weight_infant
replace weight_infant = weight_infant * (-1) if weight_infant < 0
	
		/*
			//6 cases where weight_infant < 0
				preserve
					keep if weight_infant < 0 
					keep hhid1 fo_name lk02 pf49* weight_infant
					export excel using "$analysis/Check weight measurement.xlsx", firstrow(variables)
				restore
			
			The field team confirmed that these were cases where the variables are flipped, i.e.
			enumerators recorded Mother only first, and then Mother + baby. 
			
			So all that needs to be done is to reverse the order of these measurements,
			i.e. multiply the calculated weight by -1.
		*/

***HEIGHT -- Combine the different readings into a single measure***

g height_diff_bc = abs(pf50_b - pf50_c)
g height_diff_ac = abs(pf50_a - pf50_c) 
egen least_height_diff = rowmin(height_diff_ab height_diff_bc height_diff_ac) 
egen height_ab = rowmean(pf50_a pf50_b)
egen height_bc = rowmean(pf50_b pf50_c)
egen height_ac = rowmean(pf50_a pf50_c)

g height = .
replace height = height_ab if least_height_diff == height_diff_ab
replace height = height_bc if ((least_height_diff == height_diff_bc) & (pf50_c != .)) 
replace height = height_ac if ((least_height_diff == height_diff_ac) & (pf50_c != .))
drop *height_diff* height_*


***Age of the infant***

tab sk02a
tab sk02b		//2 missing values
g sk02_day = 15
	g sk02_year = 2017 if sk02a == 1
	replace sk02_year = 2018 if sk02a == 2
g date_birth = mdy(sk02b, sk02_day, sk02_year)
format date_birth %d

g days_between = enddate-date_birth
g age_in_months = round(days_between/30) 

	/*
		//65 cases where age_in_months < 0
			preserve
				keep if age_in_months < 0 
				keep hhid1 fo_name lk02 sk02a sk02b startdate enddate submissiondate
				export excel using "$analysis/Check survey date and birth date.xlsx", firstrow(variables)
			restore
	
		The field team confirmed that the majority of these cases (at least 54 of 65) are a simple error
		in entering the year - the enumerator selected 2018 instead of 2017. 
		
		The way to fix this is to flag the relevant HHIDs, merge them, and then change sk02_year from
		2018 to 2017.
		
	*/
	
	//Flagging the relevant HHIDs
		preserve
			g flag_sk02a = (age_in_months < 0)
			tab flag_sk02a
			keep if age_in_months < 0
			keep hhid1 flag_sk02a
			save "$data/error_in_birth_year.dta", replace
		restore


merge 1:1 hhid1 using "$data/error_in_birth_year.dta"
replace sk02a = 1 if flag_sk02a == 1
drop _merge flag_sk02a

drop days_between age_in_months
g days_between = enddate-date_birth
g age_in_months = round(days_between/30) 
drop days_between

erase "$data/error_in_birth_year.dta"

**Sex of the infant**

tab sk05
count if sk05 == . & sk03 == 1	
tab sk03
	//14 missing values, all still-births
ren sk05 sex_of_infant


***Z-score: Height-for-age***

egen haz = zanthro(height, ha, WHO), xvar(age_in_months) gender(sex_of_infant) gencode(male=1, female=2) ageunit(month)

	//haz excludes outliers (i.e. those more than 5 s.d. away from the mean). It has 246 missing values.	
	
***Z-score: Weight-for-age***

egen waz = zanthro(weight_infant, wa, WHO), xvar(age_in_months) gender(sex_of_infant) gencode(male=1, female=2) ageunit(month)

	//waz excludes outliers (i.e. those more than 5 s.d. away from the mean). It has 94 missing values.	
	
***Creating outcomes**
g stunted = (haz < -2)
replace stunted = . if haz == .
	g missing_stunted = (stunted == .)
	bysort treatment: tab missing_stunted
		//136 missing values in Control, 107 in Treatment. Balanced. 

g underweight = (waz < -2)
replace underweight = . if waz == .
	g missing_underweight = (underweight == .)
	bysort treatment: tab missing_underweight
		//Missing values are balanced.
	
********************************************************************************

* PREPARING PRIMARY OUTCOMES - RQ. 4

***Participation***
desc ks03 ks07 ks09a ks09b ks09c ks09d ks09e ks09f ks09g
local participation ks03 ks07 ks09a ks09b ks09c ks09d ks09e ks09f ks09g
foreach var of local participation {
	tab `var'
	count if `var' == .d
	count if `var' == .n
	count if `var' == .r
	replace `var' = . if (`var' == .d | `var' == .n | `var' == .r)
}

g participation_2 = (ks07 != 1)
replace participation_2 = . if ks07 == .

local participation_dummy ks03 ks09a ks09b ks09c ks09d ks09e ks09f ks09g
foreach var of local participation_dummy {
	g `var'_dummy = (`var' == 1)
	replace `var'_dummy = . if `var' == .
	tab `var'
	tab `var'_dummy
}

rename ks03_dummy participation_1

g participation_3 = (ks09a_dummy == 1 | ks09b_dummy == 1 | ks09c_dummy == 1 | ///
	ks09d_dummy == 1 | ks09e_dummy == 1 | ks09f_dummy == 1 | ks09g_dummy == 1)
replace participation_3 = . if (ks09a_dummy == . | ks09b_dummy == . | ks09c_dummy == . | ///
	ks09d_dummy == . | ks09e_dummy == . | ks09f_dummy == . | ks09g_dummy == .)
	//372 missing values (i.e. 6%)
	
order participation_1, before(participation_2)
order participation_3, after(participation_2)
drop *_dummy

*Creating index of participation
	
	*1. All variables are already oriented so that the higher value (1) represents the "better" outcome
	
	*2. Standardize each outcome - using the mean and s.d. of the control group
	sum participation_1 if treatment == 0
	scalar mean_1 = r(mean)
	scalar sd_1 = r(sd)
	g z_participation_1 = (participation_1 - mean_1)/sd_1
	tab z_participation_1
	
	sum participation_2 if treatment == 0
	scalar mean_2 = r(mean)
	scalar sd_2 = r(sd)
	g z_participation_2 = (participation_2 - mean_2)/sd_2
	tab z_participation_2
	
	sum participation_3 if treatment == 0
	scalar mean_3 = r(mean)
	scalar sd_3 = r(sd)
	g z_participation_3 = (participation_3 - mean_3)/sd_3
	tab z_participation_3
	
	*3. Impute missing values at treatment assignment group mean
	foreach var in z_participation_1 z_participation_2 z_participation_3 {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
		drop mean_`var'
		tab `var'
	} 
	
	*4. Compile summary index
	egen participation = rowmean(z_participation_1 z_participation_2 z_participation_3) 
	
	
***Empowerment vignettes***
desc ks11 ks12*
foreach var in ks11 ks12a ks12b ks12c {
	tab `var'
}	

lab drop ks11
replace ks11 = 5 - ks11	//Reorienting outcome so that higher values represent "better" outcomes
		
********************************************************************************

* RESULTS - TABLE

gl strata = "strata1 strata2 strata3 strata4 strata5 strata6 strata7"

***Label variables***
lab var anc_first_trimester "ANC visit within first trimester" 
lab var number_anc_visits_binary "Four or more ANC visits"
lab var skilled_provider_birth "Birth with a skilled provider"
lab var facility_birth "Birth at a facility"
lab var postpartum_postnatal_care "Postnatal care"
lab var z_content "Content of care"
lab var stunted "Stunting"
lab var underweight "Underweight"
lab var participation "Empowerment - Participation"
lab var ks11 "Empowerment - Vignette"
	
***Produce Excel output***
cd "$analysis"

//Create the table shell
putexcel set "primary_outcome.xlsx", sheet("Primary Outcomes") replace

putexcel A1=("Outcome") B1=("Treatment Mean") C1=("Control Mean") D1=("Impact") ///
E1=("p-value") F1=("Effect Size") G1=("Sample Size") A13=("Number of Respondents") ///
A14=("Number of villages") B13=(2971) C13=(3037) B14=(100) C14=(100) ///
A16=("Treatment means are regression adjusted") A17=("*** p<0.01, ** p<0.05, * p<0.1")

//Export variable label, regression coefficient, control mean, treatment mean, p-value, sample size
//Effect sizes are exported separately (see below)
local outcomes anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 participation z_content 
local row = 2
foreach var of local outcomes {
	sleep 2000
	local varlabel : var label `var'
	
	qui reg `var' treatment $strata, vce(cluster facility_id) 
	
	local p = 2*ttail(e(df_r), abs(_b[treatment]/_se[treatment]))
	local stars
		if `p' < 0.10 local stars = "*" 
		if `p' < 0.05 local stars = "**" 
		if `p' < 0.01 local stars = "***"
	local impact = _b[treatment]
	
	qui sum `var' if treatment == 0
	putexcel A`row' = ("`varlabel'")
	putexcel B`row' = ((_b[treatment]) + (r(mean)))
	putexcel C`row' = (r(mean))
	putexcel D`row' = ("`impact'" + "`stars'")
	putexcel E`row' = (`p')
	putexcel G`row' = (e(N))
	local ++row
}

//Export effect sizes for outcomes that aren't already standardized
local simple_outcomes anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11
local row = 2
foreach var of local simple_outcomes {
	qui reg `var' treatment $strata, vce(cluster facility_id) 
	qui sum `var' if treatment == 0
	putexcel F`row' = (_b[treatment]/r(sd))
	local ++row
}

//Export effect sizes for outcomes that are already standardized
local std_outcomes participation z_content 
local row = 10
foreach var of local std_outcomes {
	qui reg `var' treatment $strata, vce(cluster facility_id) 
	putexcel F`row' = (_b[treatment])
	local ++row
}

********************************************************************************

* RESULTS - GRAPH

	/*
	
	A total of 10 primary outcomes are analyzed above. 
	
	1. Eight of them are not standardized.
	anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11
	
	2. Two of them are standardized
	participation z_content
	
	*/

***Non-standardized outcomes***

local outcomes anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 
foreach var of local outcomes {
	
	*Dividing by standard deviation of control group, to create a transformed outcome
	qui sum `var' if treatment == 0
	local sd_`var' = r(sd)
	g `var'_sd = `var'/r(sd)
	
	*Running regression on the transformed outcome
	qui reg `var'_sd treatment $strata, vce(cluster facility_id) 
	estimates store e_`var'
}

***Standardized outcomes***
	
local outcomes participation z_content
foreach var of local outcomes {
	qui reg `var' treatment $strata, vce(cluster facility_id) 
	estimates store e_`var'
}	


***Coefficient plots***

*X-axis: -0.4 to 0.4
#delimit ;
coefplot (e_anc_first_trimester, aseq("ANC visit within first trimester")
		\ e_number_anc_visits_binary, aseq("Four or more ANC visits")
		\ e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(treatment) xline(0) swapnames title("Tanzania", size(medium))
	  note("Note: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Effect Size (Standard Deviations)", size(small))
	  xlabel(-0.4(0.1)0.4) scheme(s2mono)
;	

#delimit cr

graph export "primary_outcomes_effect_sizes.png", as(png) replace

	
********************************************************************************

* ADDING BASELINE OUTCOME TO REGRESSION SPECIFICATION

merge m:1 village_id using "$data/baseline_outcomes_by_village.dta"

cap erase "primary_outcomes_with_baseline.xls"

local primary anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth participation ks11 
foreach var of local primary {
	di "`var'"
	reg `var' treatment $strata m_bl_`var', vce(cluster facility_id)
	outreg2 using "primary_outcomes_with_baseline.xls", append label keep(treatment m_bl_`var')
}

cap erase "primary_outcomes_with_baseline.txt"

********************************************************************************

* POWER CALCULATIONS

	*For primary outcomes: What effect size is the analysis powered to detect?

bysort village_id: g cluster_size = _N

	//Create the table shell
putexcel set "primary_outcome_mde.xlsx", sheet("MDE") replace
putexcel A1 = ("Outcome") B1=("Endline Mean") C1=("Detectable Difference") D1=("Detectable Difference (Std. Dev.)") 

	//Cluster size: Coefficient of variation
sum cluster_size
local cv = r(sd)/r(mean)
local row = 2

	//Calculate the detectable difference
local outcomes anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 participation z_content 
foreach var of local outcomes  {
	
	local varlabel : var label `var'
	
	loneway `var' village_id
	local rho = r(rho)
	
	sum  `var' 
	local mean_`var'= r(mean)
	local sd_`var' = r(sd)	
	
	sum `var' if treatment == 0
	local c_`var' = r(sd)
	
	clustersampsi, detectabledifference mu1(`mean_`var'') m(30) k(100) rho(`rho') sd1(`sd_`var'') size_cv(`cv')
	local dd_`var' = (r(DD))
	local dd_`var'2 = `dd_`var''/`c_`var''
	
	putexcel A`row' = ("`varlabel'") 
	putexcel B`row' = ("`mean_`var''")
	putexcel C`row' = ("`dd_`var''")	
	putexcel D`row' = ("`dd_`var'2'")
	local ++row
}

	//For outcomes that were previously already standardized: Column D (in std. dev. units) is irrelevant
putexcel D10 = (" ")
putexcel D11 = (" ") 

********************************************************************************
* SUBGROUP ANALYSES
********************************************************************************

/* LIST OF SUB-GROUP CATEGORIES AND THE ASSOCIATED VARIABLES

	
	1. Region - lk01
	
	2. Time of birth - age_in_months (variable created in 2_a_hh)
	
	3. Facility characteristics
			
*/

***Category 1***
tab lk01
g dodoma = (lk01 == 2)
g indicator_dodoma_treatment = dodoma*treatment
tab indicator_dodoma_treatment
lab var dodoma "Dodoma"
lab var indicator_dodoma_treatment "Treatment*Dodoma"

***Category 2***
g younger_birth = (age_in_months <= 6)
replace younger_birth = . if age_in_months == .
g indicator_younger_treatment = younger_birth*treatment
tab indicator_younger_treatment
lab var younger_birth "Younger infant: Age <= 6 months"
lab var indicator_younger_treatment "Treatment*Younger infant"


***Category 3***

drop _merge
merge m:1 health_facility_id using "$data/Baseline_facility_quality.dta"
drop _merge

g high_capacity = (capacity_a >= 9 & capacity_a <= 12)
g low_capacity = (capacity_a >= 0 & capacity_a <= 8)

g treatment_high_capacity = treatment*high_capacity
g treatment_low_capacity = treatment*low_capacity

lab var treatment_high_capacity "Treatment*High quality"
lab var treatment_low_capacity "Treatment*Low quality" 

********************************************************************************

* GRAPH - SUBGROUP ANALYSIS BY REGION

cd "$analysis"

drop anc_first_trimester_sd-_est_e_ks11

	/*
	
	A total of 10 primary outcomes are analyzed above. 
	
	1. Eight of them are not standardized.
	anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11
	
	2. Two of them are standardized
	participation z_content
	
	*/

***Non-standardized outcomes***

local outcomes anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 
foreach var of local outcomes {
	
	*Dividing by standard deviation of control group, to create a transformed outcome
	qui sum `var' if treatment == 0
	local sd_`var' = r(sd)
	g `var'_sd = `var'/r(sd)
	
	*Running regression on the transformed outcome
	qui reg `var'_sd treatment $strata dodoma indicator_dodoma_treatment, vce(cluster facility_id)
	estimates store e_`var'
}

***Standardized outcomes***
	
local outcomes participation z_content
foreach var of local outcomes {
	qui reg `var' treatment $strata dodoma indicator_dodoma_treatment, vce(cluster facility_id)
	estimates store e_`var'
}	


***Coefficient plot***

*X-axis: -0.4 to 0.4
#delimit ;
coefplot (e_anc_first_trimester, aseq("ANC visit within first trimester")
		\ e_number_anc_visits_binary, aseq("Four or more ANC visits")
		\ e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(indicator_dodoma_treatment) xline(0) swapnames title("Tanzania", size(medium))
	  note("Note: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Interaction: Treatment*Dodoma Region (Standard Deviations)", size(small))
	  xlabel(-0.4(0.1)0.4) scheme(s2mono)
;	

#delimit cr

graph export "$analysis/subgroup_analysis_region.png", as(png) replace

********************************************************************************

* GRAPH - TIME OF BIRTH

drop anc_first_trimester_sd-_est_e_ks11

***Non-standardized outcomes***

local outcomes anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 
foreach var of local outcomes {
	
	*Dividing by standard deviation of control group, to create a transformed outcome
	qui sum `var' if treatment == 0
	local sd_`var' = r(sd)
	g `var'_sd = `var'/r(sd)
	
	*Running regression on the transformed outcome
	qui reg `var'_sd treatment $strata younger_birth indicator_younger_treatment, vce(cluster facility_id)
	estimates store e_`var'
}

***Standardized outcomes***
	
local outcomes participation z_content
foreach var of local outcomes {
	qui reg `var' treatment $strata younger_birth indicator_younger_treatment, vce(cluster facility_id)
	estimates store e_`var'
}	


***Coefficient plot***

*X-axis: -0.4 to 0.4
#delimit ;
coefplot (e_anc_first_trimester, aseq("ANC visit within first trimester")
		\ e_number_anc_visits_binary, aseq("Four or more ANC visits")
		\ e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(indicator_younger_treatment) xline(0) swapnames title("Tanzania", size(medium))
	  note("Note: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Interaction: Treatment*Earlier birth (Standard Deviations)", size(small))
	  xlabel(-0.4(0.1)0.4) scheme(s2mono)
;	

#delimit cr

graph export "$analysis/subgroup_analysis_time_of_birth.png", as(png) replace

********************************************************************************

* GRAPH - PROVIDER CAPACITY

drop anc_first_trimester_sd-_est_e_ks11

local outcomes anc_first_trimester number_anc_visits_binary skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 
foreach var of local outcomes {
	
	*Dividing by standard deviation of control group, to create a transformed outcome
	qui sum `var' if treatment == 0
	local sd_`var' = r(sd)
	g `var'_sd = `var'/r(sd)
	
	*Running regression on the transformed outcome
	reg `var'_sd treatment $strata high_capacity treatment_high_capacity, vce(cluster facility_id)
	estimates store e_`var'
}
	
local outcomes participation z_content
foreach var of local outcomes {
	reg `var' treatment $strata high_capacity treatment_high_capacity, vce(cluster facility_id)
	estimates store e_`var'
}	

#delimit ;
coefplot (e_anc_first_trimester, aseq("ANC visit within first trimester")
		\ e_number_anc_visits_binary, aseq("Four or more ANC visits")
		\ e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(treatment_high_capacity) xline(0) swapnames title("Tanzania: Health service capacity", size(medium))
	  note("Notes: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Interaction: Treatment*High quality facility (Standard Deviations)", size(small))
	  xlabel(-0.6(0.1)0.6) scheme(s2mono)
;	

#delimit cr

graph export "$analysis/subgroup_analysis_provider_capacity_high.png", as(png) replace

********************************************************************************

clear
		
